{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "run_control": {
     "marked": true
    }
   },
   "source": [
    "## Description：\n",
    "这里是通过一个简单的小案例来看看如何通过掉包的方式使用FM模型"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-09-25T11:25:35.088692Z",
     "start_time": "2020-09-25T11:25:34.403532Z"
    }
   },
   "outputs": [],
   "source": [
    "# 导入包\n",
    "from pyfm import pylibfm\n",
    "from sklearn.feature_extraction import DictVectorizer\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "使用这个类最简单的方式就是把数据存成字典的形式， 然后用DictVectorizer进行one-hot"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-09-25T11:32:15.011587Z",
     "start_time": "2020-09-25T11:32:15.003609Z"
    }
   },
   "outputs": [],
   "source": [
    "train = [\n",
    "    {'user': '1', 'item': '5', 'age': 19},\n",
    "    {'user': '2', 'item': '43', 'age': 33},\n",
    "    {'user': '3', 'item': '20', 'age': 55},\n",
    "    {'user': '4', 'item': '10', 'age': 20}\n",
    "]\n",
    "v = DictVectorizer()\n",
    "X = v.fit_transform(train)      # 本身被压缩了"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-09-25T11:32:48.927201Z",
     "start_time": "2020-09-25T11:32:48.922214Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[19.,  0.,  0.,  0.,  1.,  1.,  0.,  0.,  0.],\n",
       "       [33.,  0.,  0.,  1.,  0.,  0.,  1.,  0.,  0.],\n",
       "       [55.,  0.,  1.,  0.,  0.,  0.,  0.,  1.,  0.],\n",
       "       [20.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  1.]])"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X.toarray()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-09-25T11:33:20.254650Z",
     "start_time": "2020-09-25T11:33:20.250660Z"
    }
   },
   "outputs": [],
   "source": [
    "y = np.repeat(1, X.shape[0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-09-25T11:33:37.234732Z",
     "start_time": "2020-09-25T11:33:37.227749Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Creating validation dataset of 0.01 of training for adaptive regularization\n",
      "-- Epoch 1\n",
      "Training log loss: 0.38084\n"
     ]
    }
   ],
   "source": [
    "fm = pylibfm.FM()\n",
    "fm.fit(X, y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-09-25T11:34:25.719204Z",
     "start_time": "2020-09-25T11:34:25.714261Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([0.99212296])"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 测试集\n",
    "test = v.transform({'user': \"1\", 'item': \"10\", 'age': 24})\n",
    "fm.predict(test)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.8"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": false
  },
  "varInspector": {
   "cols": {
    "lenName": 16,
    "lenType": 16,
    "lenVar": 40
   },
   "kernels_config": {
    "python": {
     "delete_cmd_postfix": "",
     "delete_cmd_prefix": "del ",
     "library": "var_list.py",
     "varRefreshCmd": "print(var_dic_list())"
    },
    "r": {
     "delete_cmd_postfix": ") ",
     "delete_cmd_prefix": "rm(",
     "library": "var_list.r",
     "varRefreshCmd": "cat(var_dic_list()) "
    }
   },
   "types_to_exclude": [
    "module",
    "function",
    "builtin_function_or_method",
    "instance",
    "_Feature"
   ],
   "window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
